/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package flink.parquet.tpcds;

import flink.parquet.thrift.*;
import org.apache.flink.api.common.functions.JoinFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.aggregation.Aggregations;
import org.apache.flink.api.java.hadoop.mapreduce.HadoopInputFormat;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple4;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import parquet.filter2.predicate.FilterPredicate;
import parquet.hadoop.ParquetInputFormat;
import parquet.hadoop.thrift.ParquetThriftInputFormat;
import parquet.hadoop.thrift.ThriftReadSupport;

import java.io.IOException;

import static parquet.filter2.predicate.FilterApi.and;
import static parquet.filter2.predicate.FilterApi.longColumn;
import static parquet.filter2.predicate.FilterApi.eq;
import static parquet.filter2.predicate.Operators.LongColumn;

/**
 * This program implements a modified version of the TPC-DS query 55. The
 * example demonstrates how to assign names to fields by extending the Tuple class.
 * The original query can be found at
 * <a href="http://www.tpc.org/tpc_documents_current_versions/pdf/tpcds_1.3.1.pdf">http://www.tpc
 * .org/tpc_documents_current_versions/pdf/tpcds_1.3.1.pdf</a>.
 * <p/>
 * <p/>
 * This program implements the following SQL equivalent:
 * <p/>
 * <p/>
 * <code><pre>
 * select  i_brand_id brand_id, i_brand brand,
 * sum(ss_ext_sales_price) ext_price
 * from date_dim, store_sales, item
 * where date_dim.d_date_sk = store_sales.ss_sold_date_sk
 * and store_sales.ss_item_sk = item.i_item_sk
 * and i_manager_id=28
 * and d_moy=11
 * and d_year=1999
 * group by i_brand, i_brand_id
 * order by ext_price desc, i_brand_id
 * limit 100 ;
 * </pre></code>
 * <p/>
 * <p/>
 * Compared to the original TPC-DS query this version does not sort and does limit the result.
 * <p/>
 * <p/>
 * Input files are plain text CSV files using the pipe character ('|') as field separator
 * as generated by the TPC-DS data generator which is available at <a href="http://www.tpc.org/tpcds/">http://www.tpc
 * .org/tpcds/</a>.
 * <p/>
 * <p/>
 * Usage: <code>TPCDSQuery55Parquet &lt;lineitem-parquet path&gt; &lt;customer-parquet path&gt; &lt;orders-parquet 
 * path&gt; &lt;result path&gt;</code><br>
 * <p/>
 * <p/>
 * This example shows how to use:
 * <ul>
 * <li> custom data type derived from tuple data types
 * <li> inline-defined functions
 * <li> build-in aggregation functions
 * </ul>
 */
@SuppressWarnings("serial")
public class TPCDSQuery55Parquet {

	// *************************************************************************
	//     PROGRAM
	// *************************************************************************

	public static void main(String[] args) throws Exception {

		long startTime = System.currentTimeMillis();

		if (!parseParameters(args)) {
			return;
		}

		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// get input data
		DataSet<DataDim> dataDims = getDataDimDataSet(env).map(new MapDataDim());
		DataSet<Item> item = getItemDataSet(env).map(new MapItem());
		DataSet<StoreSales> storeSales = getStoreSalesDataSet(env).map(new MapStoreSales());

		dataDims.join(storeSales).where(0).equalTo(0).with(new DataDimAndStoreSales())
			.join(item).where(1).equalTo(0).with(new DataDimAndStoreSalesAndItems())
			.groupBy(1, 0).aggregate(Aggregations.SUM, 2)
			.print();

		// execute program
		env.execute("TPC-DS Query 55 Example with Parquet input");

		System.out.println("Execution time: " + (System.currentTimeMillis() - startTime));
	}

	public static class DataDimAndStoreSales implements JoinFunction<DataDim, StoreSales, Tuple2<Double, Long>> {
		@Override
		public Tuple2<Double, Long> join(DataDim d, StoreSales s) {
			return new Tuple2<Double, Long>(s.getSs_ext_sales_price(), s.getSs_item_sk());
		}
	}

	public static class DataDimAndStoreSalesAndItems implements JoinFunction<Tuple2<Double, Long>, Item, Tuple3<Long, 
		String, Double>> {
		@Override
		public Tuple3<Long, String, Double> join(Tuple2<Double, Long> twoTables, Item i) {
			return new Tuple3<Long, String, Double>(i.getI_brand_id(), i.getI_brand(), twoTables.f0);
		}
	}


	// *************************************************************************
	//     UTIL METHODS
	// *************************************************************************

	private static String datadimPath;
	private static String storesalesPath;
	private static String itemPath;
	private static String outputPath;

	private static boolean parseParameters(String[] programArguments) {

		if (programArguments.length > 0) {
			if (programArguments.length == 4) {
				datadimPath = programArguments[0];
				storesalesPath = programArguments[1];
				itemPath = programArguments[2];
				outputPath = programArguments[3];
			} else {
				System.err.println("Usage: TPCHQuery3 <lineitem-csv path> <customer-csv path> <orders-csv path> " +
					"<result path>");
				return false;
			}
		} else {
			System.err.println("This program expects data from the TPC-H benchmark as input data.\n" +
				"  Due to legal restrictions, we can not ship generated data.\n" +
				"  You can find the TPC-H data generator at http://www.tpc.org/tpch/.\n" +
				"  Usage: TPCHQuery3 <lineitem-csv path> <customer-csv path> <orders-csv path> <result path>");
			return false;
		}
		return true;
	}


	private static final class MapDataDim implements MapFunction<Tuple2<Void, DateDimTable>, DataDim> {

		@Override
		public DataDim map(Tuple2<Void, DateDimTable> value) {
			DataDim tuple = new DataDim();
			tuple.f0 = value.f1.getD_date_sk();
			tuple.f1 = value.f1.getD_year();
			tuple.f2 = value.f1.getD_moy();
			return tuple;
		}
	}

	private static final class MapItem implements MapFunction<Tuple2<Void, ItemTable>, Item> {
		@Override
		public Item map(Tuple2<Void, ItemTable> value) {
			Item tuple = new Item();
			tuple.f0 = value.f1.getI_item_sk();
			tuple.f1 = value.f1.getI_brand_id();
			tuple.f2 = value.f1.getI_brand();
			tuple.f3 = value.f1.getI_manager_id();
			return tuple;
		}
	}

	private static final class MapStoreSales implements MapFunction<Tuple2<Void, StoreSalesTable>, StoreSales> {
		@Override
		public StoreSales map(Tuple2<Void, StoreSalesTable> value) {
			StoreSales tuple = new StoreSales();
			tuple.f0 = value.f1.getSs_sold_date_sk();
			tuple.f1 = value.f1.getSs_item_sk();
			tuple.f2 = value.f1.getSs_ext_sales_price();
			return tuple;
		}
	}


	private static DataSet<Tuple2<Void, DateDimTable>> getDataDimDataSet(ExecutionEnvironment env) throws IOException {
		Job job = Job.getInstance();

		//Schema projection
		ParquetInputFormat.setReadSupportClass(job, ThriftReadSupport.class);
		job.getConfiguration().set("parquet.thrift.column.filter", "d_date_sk;d_year;d_moy");

		HadoopInputFormat hadoopInputFormat = new HadoopInputFormat(new ParquetThriftInputFormat(), Void.class, 
			DateDimTable.class, job);

		// Filter
		LongColumn moy = longColumn("d_moy");
		LongColumn year = longColumn("d_year");
		FilterPredicate moyPred = eq(moy, 11L);
		FilterPredicate yearPred = eq(year, 1999L);
		FilterPredicate constraint = and(moyPred, yearPred);
		ParquetThriftInputFormat.setFilterPredicate(job.getConfiguration(), constraint);

		ParquetThriftInputFormat.addInputPath(job, new Path(datadimPath));

		DataSet<Tuple2<Void, DateDimTable>> data = env.createInput(hadoopInputFormat);

		return data;
	}

	private static DataSet<Tuple2<Void, StoreSalesTable>> getStoreSalesDataSet(ExecutionEnvironment env) throws 
		IOException {
		Job job = Job.getInstance();

		//Schema projection		
		ParquetInputFormat.setReadSupportClass(job, ThriftReadSupport.class);
		job.getConfiguration().set("parquet.thrift.column.filter", "ss_sold_date_sk;ss_item_sk;ss_ext_sales_price");

		HadoopInputFormat hadoopInputFormat = new HadoopInputFormat(new ParquetThriftInputFormat(), Void.class, 
			StoreSalesTable.class, job);
		ParquetThriftInputFormat.addInputPath(job, new Path(storesalesPath));
		DataSet<Tuple2<Void, StoreSalesTable>> data = env.createInput(hadoopInputFormat);

		return data;
	}

	private static DataSet<Tuple2<Void, ItemTable>> getItemDataSet(ExecutionEnvironment env) throws IOException {
		Job job = Job.getInstance();

		//Schema projection
		ParquetInputFormat.setReadSupportClass(job, ThriftReadSupport.class);
		job.getConfiguration().set("parquet.thrift.column.filter", "i_item_sk;i_brand_id;i_brand;i_manager_id");

		HadoopInputFormat hadoopInputFormat = new HadoopInputFormat(new ParquetThriftInputFormat(), Void.class, 
			ItemTable.class, job);
		ParquetThriftInputFormat.addInputPath(job, new Path(itemPath));

		//Filter		
		LongColumn managerId = longColumn("i_manager_id");
		FilterPredicate managerPred = eq(managerId, 28L);
		ParquetThriftInputFormat.setFilterPredicate(job.getConfiguration(), managerPred);

		DataSet<Tuple2<Void, ItemTable>> data = env.createInput(hadoopInputFormat);

		return data;
	}

	public static class DataDim extends Tuple3<Long, Long, Long> {
		public Long getD_date_sk() {
			return this.f0;
		}

		public Long getD_year() {
			return this.f1;
		}

		public Long getD_moy() {
			return this.f2;
		}
	}

	public static class StoreSales extends Tuple3<Long, Long, Double> {
		public Long getSs_sold_date_sk() {
			return this.f0;
		}

		public Long getSs_item_sk() {
			return this.f1;
		}

		public Double getSs_ext_sales_price() {
			return this.f2;
		}
	}

	public static class Item extends Tuple4<Long, Long, String, Long> {
		public Long getI_item_sk() {
			return this.f0;
		}

		public Long getI_brand_id() {
			return this.f1;
		}

		public String getI_brand() {
			return this.f2;
		}

		public Long getI_manager_id() {
			return this.f3;
		}
	}
}
